Two methods for plotting spatial data - either using traditional
plot or using the spplot tool. Overview of
datasets used in lecture:
meuse - Data frame of 155 locations around river Meuse,
measurements of heavy metal concentrations are taken.meuse.riv - A 176 x 2 matrix with identical first and
last rows, river coordinates for Polygon object.meuse.grid - Data frame with 3103 rows. Each row
corresponds to a pixel of the grid of the river bank.plot function for Spatial Datalibrary(sp)
data(meuse) # loading the meuse river dataset
coordinates(meuse) <- c("x", "y") # Sets up meuse as a SpatialPointsDataFrame object
plot(meuse); title("Points")
SpatialPolygonsdata(meuse.riv)
meuse.lst <- list(Polygons( # Initialize Polygons object with ID = "meuse.riv"
list(Polygon(meuse.riv)),
"meuse.riv"))
meuse.pol <- SpatialPolygons(meuse.lst) # Initialize SpatialPolygons object
plot(meuse.pol, col = "grey")
title("Polygons")
SpatialPixelsdata(meuse.grid)
coordinates(meuse.grid) <- c("x", "y")
meuse.grid <- as(meuse.grid, # Recall we need to use the as() function to cast
"SpatialPixels") ## as a SpatialPixels object
image(meuse.grid, col = "grey") # Use image to plot gridded data
title("Grid")
First, set up the grid, then add the river as a Polygon, and lastly adding the points where the data was sampled. Now, we can see where the points were sampled in relation to the river.
image(meuse.grid, col = "khaki2")
plot(meuse.pol,
col = "lightsteelblue2",
add = TRUE) # To add the plots to the image, set `add = TRUE`
plot(meuse, add = TRUE,
col = "brown", cex = .5) # Plot points with scale factor = .5
plotSuppose we want to illustrate zinc concentration on the map - we can use different symbol sizes (or colors) to represent the zinc levels at the 155 locations, with larger symbols for higher levels.
image(meuse.grid, col = "lightgray")
plot(meuse.pol, add = TRUE)
plot(meuse, pch = 1, cex = sqrt(meuse$zinc)/20, add = TRUE)
legVals <- c(100, 200, 500, 1000, 2000)
legend("left", legend=legVals, pch = 1, pt.cex = sqrt(legVals)/20,
bty = "n",
title="measured, ppm", cex=0.8, y.inter=0.9)
title("Measured and interpolated zinc")
spplotspplot tool now instead of R default
plot function. Suppose we want to visualize levels of all
four metals in a single plot, each panel corresponding to one metal
type.col.regions
argument.# Create a color palette with n=7 colors
library(RColorBrewer)
grys <- brewer.pal(7, "Reds") # Alternatives: Try "Blues", "Greens", "Oranges"
cuts=c(0,20,50,200,500,2000)
print(spplot(meuse,
c("cadmium", "copper", "lead", "zinc"),
key.space="right", main = "ppm (Raw)",
layout=c(4,1), # layout = c(cols,rows)
cex = .5, cuts = cuts, col.regions=grys)) # Input color palette in the col.regions argument
# Creating new columns for standardized values in data.frame
# Scale to mean zero and var = 1
meuse$lead.st = as.vector(scale(meuse$lead))
meuse$zinc.st = as.vector(scale(meuse$zinc))
meuse$copper.st = as.vector(scale(meuse$copper))
meuse$cadmium.st = as.vector(scale(meuse$cadmium))
cuts=c(-1.2,0,1,2,3,5) # We will see later techniques for selecting cuts
print(spplot(meuse,
c("cadmium.st", "copper.st", "lead.st", "zinc.st"),
key.space="right", main = "Standardised",
layout=c(4,1),
cex = .5, cuts = cuts, col.regions=grys))
Note that unlike the plot function, spplot
is not sutiable for sequential adding of new elements - we need to nest
the new elements within the plot command. We include them in the
sp.layout argument.
The layout for each new element has three components:
SpatialPoints, SpatialLines,
SpatialPolygons or text.meuse.polriver <- list("sp.polygons", meuse.pol)
bank <- list("sp.polygons", meuse.grid, col="lightgray", alpha=0.2)
print(spplot(meuse,
c("cadmium.st", "copper.st", "lead.st", "zinc.st"),
key.space="right", main = "standardised",
layout=c(4,1), sp.layout=list(river, bank), # Include the `sp.layout` argument for new elements
cex = .5, cuts = cuts, col.regions=grys))
# Use the which argument to a layout item to specify which panels the item should be added to
river <- list("sp.polygons", meuse.pol, which=2) # e.g. only add river to panel 2
print(spplot(meuse,
c("cadmium.st", "copper.st", "lead.st", "zinc.st"),
key.space="right", main = "standardised",
layout=c(4,1), sp.layout=list(river, bank),
cex = .5, cuts = cuts, col.regions=grys))
#### Arguments in sp.layout
rainbow(n=3) # Generate random color palette (but not advisable to use this)
## [1] "#FF0000" "#00FF00" "#0000FF"
col.out <- cm.colors(n=7)
print(spplot(meuse, "cadmium.st", key.space="right",
main = "standardised",
sp.layout=river, cuts = cuts,
col.regions=col.out))
rw.colors <- colorRampPalette(c("red", "blue"))
print(spplot(meuse, "cadmium.st", key.space="right",
main = "standardised",
sp.layout=river, cuts = cuts,
col.regions=rw.colors(7)))
# display.brewer.all()
To address how to select the appropriate cuts - use methods defined
in the classInt package. Call the
classIntervals(...) function with argument
style = the following:
"quantile" - Split by Quantiles"fisher" - Natural Breaks using Jenks(-Fisher)
algorithm; Break-points are selected so that observations that are most
alike are grouped together. (Preferred Method)"equal" - Equal width intervalsObserve in the plot for Fisher-Jenks method that values are distributed more uniformly across class intervals.
library(classInt)
pal <- brewer.pal(n=5, "Reds") # RColorBrewer
q5 <- classIntervals(meuse$zinc, n=5, style="quantile"); q5 # n defines no. of breaks required
## style: quantile
## one of 14,891,626 possible partitions of this variable into 5 classes
## [113,186.8) [186.8,246.4) [246.4,439.6) [439.6,737.2) [737.2,1839]
## 31 31 31 31 31
fj5 <- classIntervals(meuse$zinc, n=5, style="fisher"); fj5 ## advised to be not more than 6
## style: fisher
## one of 14,891,626 possible partitions of this variable into 5 classes
## [113,307.5) [307.5,573) [573,869.5) [869.5,1286.5) [1286.5,1839]
## 75 32 29 12 7
par(mfrow = c(1, 2))
plot(q5, pal = pal, main = "Quantile")
plot(fj5, pal = pal, main = "Fisher-Jenks")
require(gridExtra)
## Loading required package: gridExtra
p1 <- spplot(meuse, "zinc", cuts=q5$brks, col.regions=pal, key.space="left",
main="Quantiles")
p2 <- spplot(meuse, "zinc", cuts=fj5$brks, col.regions=pal, key.space="right",
main="Fisher")
grid.arrange(p1,p2, ncol = 2)
Consider disease counts \(Y_1, \dots, Y_N\) for \(N\) regions, with corresponding population count \(n_1, \dots, n_N\) for each region.
Define the (raw) observed rates by \(r_i = \frac{Y_i}{n_i}\).
Observation: For regions with small population, disease rates can become over-inflated, i.e. small number problem.
One possible solution is to make use of locally weighted averages. We can obtain a smoothed value for each region by simply averaging the values associated with neighboring regions.
Consider the smoothed rate given by
\[\begin{equation} \tilde\xi_i = \frac{\sum_{j=1}^{N} w_{ij} r_j }{\sum_{j=1}^{N} w_{ij}} \end{equation}\]
where \(w_ij\) represents the weights. Possible options of weights to be used can include:
Assuming that \(Y_i \sim Pois(n_i\xi)\) and \(r_i = Y_i/n_i\) ….
To avoid the dominance of regions with small populations, a better alternative is to use the following smoothed rate:
\[ \hat\xi_i = \frac{\sum_{j=1}^{N} w_{ij} Y_j }{\sum_{j=1}^{N} w_{ij} n_j} \] i.e. Taking ratio between the smoothed disease count, and the smoothed population count (derived by non-parametric regression).
library(rgdal); library(spdep)
# Load Data, Select Color Palette
NY8 <- readOGR("NY_data", "NY8_utm18", verbose=FALSE)
pal <- brewer.pal(n=5, "Blues")
# Generate Plot for Unsmoothed Rates i.e. r_i
NY8$r <- NY8$TRACTCAS / NY8$POP8 /5 * 1e+5 # Displaying the rate per 20,000 (/5 * 1e+5)
spplot(NY8, "r", col.regions=pal, main="Unsmoothed Rates",
at=classIntervals(NY8$r, n=5, "fisher")$brks) # Using Natural Breaks
# Using Method of Empirical Bayes -- EBest(cases, population size)
NY8$eb <- EBest(NY8$TRACTCAS, NY8$POP8)$estmm * 1e+5 / 5 # Extract empirical Bayes estimates, then scale
f5 <- classIntervals(NY8$eb, n=5, "fisher")$brks
f5[6] <- 25; f5[1] <- 5 # Add Max and Min to the breaks
# Generate Plot using Empirical Bayes
spplot(NY8, "eb", col.regions=pal, main="Emp. Bayes Smoother",
at=f5)
Probability mapping refers to a map of p-values instead of disease rates. Consider the model
\[ Y_i \sim Pois(n_i\xi_i) \]
and hypotheses
\[ H_0: \xi_i = \xi \quad \text{vs} \quad H_1:\xi_i \neq \xi \]
for a given \(\xi\). Next, we calculate the probability of being more extreme than our observed count in each region, denoted by \(p_i\) (also known as the one-sided p-value):
\[ p_i = \begin{cases} P(Y_i \geq y_i|\xi_i = \xi) \quad \text{if} \quad y_i > n_i\xi \\ P(Y_i \leq y_i|\xi_i = \xi) \quad \text{if} \quad y_i < n_i\xi \end{cases} \] where \(y_i\) is the observed disease count for region \(i\). Note that \(\xi\) is unknown. However, we can use the global rate
\[ r_\text{global} = \sum_{i=1}^{N}Y_i / \sum_{i=1}^{N}n_i \]
as a reasonable estimator for \(\xi\). (i.e. overall mean risk - ratio of total count against total population)
We might be more interested in the rates that are significantly high/low. Thus, the following plot indicates the regions for which the probability of a higher (red) / lower (blue) rate under the null hypothesis of a constant mean rate is < 0.05.
# Calculate Choynowski Probability map values
choy <- choynowski(NY8$TRACTCAS, NY8$POP8)
# Obtain the indexes for which the p-values are small using which()
id_less <- which(choy$pmap < 0.05 & choy$type == TRUE) # type - TRUE if observed count LESS than expected
id_more <- which(choy$pmap < 0.05 & choy$type != TRUE)
plot(NY8, axes=TRUE)
# Plotting only cases where p-values are small
plot(NY8[id_less, ], col='blue', border=NA, add=TRUE) # where Observed < Expected
plot(NY8[id_more, ], col='red', border=NA, add=TRUE) # where Observed > Expected
legend("topright", fill=c("red", "blue"),
legend=c("Obs. > Exp", "Obs. < Exp"))
EPSG codes are 4-5 digit numbers representing
CRS definitions. You can create a list of EPSG codes using
the make_epsg(). We will be working with the Singapore CRS
(code == 3414).library(rgdal)
EPSG <- make_EPSG() # creates data frame of EPSG projection codes
# Singapore Land Authority (SLA) uses the SVY21 projection CRS
EPSG[EPSG$code == 3414,]
## code note
## 1411 3414 SVY21 / Singapore TM
## prj4
## 1411 +proj=tmerc +lat_0=1.36666666666667 +lon_0=103.833333333333 +k=1 +x_0=28001.642 +y_0=38744.572 +ellps=WGS84 +units=m +no_defs +type=crs
## prj_method
## 1411 Transverse Mercator
sg <- readRDS("sg_boundary.rds") # Load SG boundary shapefile
plot(sg,axes = TRUE)
grid()
Use the spTransform function to transform spatial
datasets from one CRS to another. For example, we want to convert the
CRS of sg from SVY21 to WGS84.
library(sp)
sg2 <- spTransform(sg, CRS("+proj=longlat +datum=WGS84"))
plot(sg2, axes=TRUE)
as.numeric function, and from numbers to DMS class using
the dd2ms function.Qtown_lat <- dd2dms(1.2937, NS=TRUE) # NS TRUE for north/south decimal degrees
Qtown_lon <- dd2dms(103.8125, NS=FALSE) # NS FALSE for east/west decimal degrees
Qtown_lon; as.numeric(Qtown_lon)
## [1] 103d48'45"E
## [1] 103.8125
In general, spatial data files are typically stored in a shapefile format. Information on each dataset is contained in 3 or more files within the same folder, and are of the form .shp or .dbf etc.
# Reading in data on Shapefile Format
ogrInfo("/Users/jyeo_/Desktop/MSc Statistics Coursework/ST5226 Spatial Statistics/Data/NY_data", "NY8cities")
## Source: "/Users/jyeo_/Desktop/MSc Statistics Coursework/ST5226 Spatial Statistics/Data/NY_data", layer: "NY8cities"
## Driver: ESRI Shapefile; number of rows: 6
## Feature type: wkbPoint with 2 dimensions
## Extent: (372236.8 4662141) - (445727.9 4771698)
## CRS: +proj=utm +zone=18 +ellps=WGS84 +units=m +no_defs
## LDID: 0
## Number of fields: 1
## name type length typeName
## 1 names 4 80 String
cities <- readOGR("/Users/jyeo_/Desktop/MSc Statistics Coursework/ST5226 Spatial Statistics/Data/NY_data", "NY8cities")
## OGR data source with driver: ESRI Shapefile
## Source: "/Users/jyeo_/Desktop/MSc Statistics Coursework/ST5226 Spatial Statistics/Data/NY_data", layer: "NY8cities"
## with 6 features
## It has 1 fields
cities; proj4string(cities) # See cities in dataset and proj4string attributes
## class : SpatialPointsDataFrame
## features : 6
## extent : 372236.8, 445727.9, 4662141, 4771698 (xmin, xmax, ymin, ymax)
## crs : +proj=utm +zone=18 +ellps=WGS84 +units=m +no_defs
## variables : 1
## names : names
## min values : Auburn
## max values : Syracuse
## [1] "+proj=utm +zone=18 +ellps=WGS84 +units=m +no_defs"
readOGR and writeOGR. Note that the data
first has to be of the WGS84 datum.# Loading Dengue Clusters data:
den <- readRDS("den_clusters_2015_12_15.rds")
denWGS <- spTransform(den, CRS("+proj=longlat +datum=WGS84")) # Convert to WGS84
# Writing to KML file (for importing into Google Earth):
# writeOGR(denWGS, dsn='den.kml', layer='test', driver='KML')
# Open `den.kml` file in Google Earth
library(RgoogleMaps)
clusters <- c(3, 4, 24, 29, 31, 44, 47) # Selecting dengue clusters of interest
rochor.clusters <- as(denWGS[clusters, ], "SpatialPolygons") # convert to SpatialPolygons
# Extract map from Google Maps using the bbox values (specify longitude and latitude):
bbox(rochor.clusters) # Gives an indication of where the clusters lie within
## min max
## x 103.840903 103.865013
## y 1.306601 1.321899
rochor.map <- GetMap(center=c(lat=1.315, lon=103.853), zoom=15)
# Plot polygons on the extracted map:
PlotPolysOnStaticMap(rochor.map, rochor.clusters,
add=FALSE)
rgeos package.
gArea - Finding the area of polygons.gUnaryUnion - Merge 2 or more polygons into a single
polygon. Identifies common boundaries and intersections and reduces
polygon count accordingly.gBuffer - Creates a buffer region around a feature of
interest (to highlight feature).over - Combining information in spatial objects of
possibly different classes.library(rgeos)
# Computing Polygon Area:
subzones <- readRDS("sg_subzones.rds"); head(subzones$subzone) # View SG sub-zones
## [1] YUHUA CLEMENTI WOODS SENNETT PAYA LEBAR BIDADARI
## [6] WOODLEIGH
## 310 Levels: ADMIRALTY AIRPORT ROAD ALEXANDRA HILL ALEXANDRA NORTH ... YUNNAN
sub.area <- gArea(subzones, byid = TRUE); tail(sub.area, n=1) # Computing the area of each sub-zone
## SAMULUN
## 327170.9
# Merging Polygons using gUnaryUnion:
id <- c(20, 87, 181)
plot(subzones[id,], axes=TRUE) # Plotting subset of sub-zones
union <- gUnaryUnion(subzones[id,])
plot(union, axes=TRUE) # Note difference in plots upon union of sub-zone polygons
# Creating a Buffer for the PIE Expressway
express <- readRDS("sg_expressways.rds")
PIE.index <- express$name == "Pan-Island Expressway"
PIE <- express[PIE.index,] # Filtering out PIE data from Expressway Dataset
out <- gBuffer(PIE, width=1000)
# Generating Plots
plot(sg)
plot(out, col=grey(0.5, 0.2), border=NA, add=TRUE) # Plot of buffer region, grey(level,alpha) for color specification
plot(PIE, col='red', add=TRUE) # Plot PIE points
over# Applying over on meuse (Spatial Points) and meuse.grid (Spatial Pixels)
index <- over(meuse, meuse.grid) # at the spatial locations of x, retrieve indexes or attributes from y
head(index) # i.e. 1st location in meuse lies in 9th cell of the grid etc.
## 1 2 3 4 5 6
## 9 24 28 41 93 128
index.all is a list with the same length as
meuse. The 1st item in index.all are the
indices of meuse.grid that the 1st feature in
meuse has intersection with, and so forth.index.all <- over(meuse, meuse.grid, returnList=TRUE) # set returnList argument = TRUE
head(index.all, n = 3)
## $`1`
## [1] 9
##
## $`2`
## [1] 24
##
## $`3`
## [1] 28
x <- SpatialPoints(coords=matrix(c(1.1,1.1), nrow = 1))
g <- GridTopology(c(0,0),c(1,1),c(3,3)); y <- SpatialGrid(g)
# Point over Grid
plot(y, axes = TRUE);plot(x, add = TRUE)
over(x,y)
## 1
## 5
# Grid over Grid
g2 <- GridTopology(c(0.1,0.1),c(1,1),c(3,3)); x2 <- SpatialGrid(g2)
over(x2,y)
## 1 2 3 4 5 6 7 8 9
## 1 2 3 4 5 6 7 8 9